Library loading¶

In [1]:
import scanpy as sc, anndata as ad, numpy as np, pandas as pd
from scipy import sparse
from anndata import AnnData
import warnings
import socket
import holoviews as hv
import plotly.express as px
import kaleido


from matplotlib import pylab
import sys
import yaml
import os
from pandas.api.types import CategoricalDtype
import plotly
import random


import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')
In [2]:
plotly.offline.init_notebook_mode()
In [3]:
%matplotlib inline
In [4]:
import ipynbname
nb_fname = ipynbname.name()
In [5]:
sc.settings.verbosity = 3         # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=50, facecolor='white')
pylab.rcParams['figure.figsize'] = (9, 9)
scanpy==1.8.1 anndata==0.7.6 umap==0.4.6 numpy==1.20.2 scipy==1.6.3 pandas==1.2.4 scikit-learn==0.24.2 statsmodels==0.13.1 python-igraph==0.9.8 louvain==0.7.1 pynndescent==0.5.5

Configure paths¶

In [6]:
hostRoot = "-".join(socket.gethostname().split('-')[0:2])

 
outdir="./outdir"
adataPath = outdir+"/3_polaroid_quickAnno.h5ad"
figDir = "./figures"

leiden3Mapping = {"0":"Encs-1",
"1":"CBC/BRCs-1",
"2":"EXN-1",
"3":"EnCs-2",
"4":"RGCs-1",
"5":"RGCs-2",
"6":"ccRGCs",
"7":"inPCs",
"8":"EXN-2",
"9":"RtCs",
"10":"ER-Cs",
"11":"CBC/BRCs-2"}

Load adata¶

In [7]:
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] =  adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
In [8]:
adata
Out[8]:
AnnData object with n_obs × n_vars = 18822 × 28371
    obs: 'dataset', 'organoid', 'region', 'type', 'type_region', 'regionContrast', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'is.Stressed', 'leiden0.3', 'leidenAnno'
    var: 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'mean', 'std'
    uns: 'dataset_colors', 'dendrogram_leidenAnno', 'diffmap_evals', 'draw_graph', 'is.Stressed_colors', 'leiden', 'leiden0.3_colors', 'leidenAnno_colors', 'neighbors', 'organoid_colors', 'pca', 'rank_genes_groups', 'regionContrast_colors', 'region_colors', 'type_colors', 'umap'
    obsm: 'X_diffmap', 'X_draw_graph_fa', 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'

UMAP¶

In [9]:
sc.pl.umap(adata, color=["leidenAnno","type","region","regionContrast"], ncols = 2, size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)
In [10]:
# Ordering definition

compositions = pd.DataFrame(adata.obs.groupby(["region","leidenAnno"]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby("region")["number_of_cells"].sum()[compositions.region])
compositions["number_of_cells_normByregionANDleidenAnno"] = np.array(compositions["number_of_cells_normByregion"]) / np.array(compositions.groupby("leidenAnno")["number_of_cells_normByregion"].sum()[compositions["leidenAnno"]])

leidenOrder = compositions[compositions["region"] == "proximal"].sort_values("number_of_cells_normByregionANDleidenAnno", ascending=False)["leidenAnno"].tolist()
leidenOrder
Out[10]:
['RGCs-2',
 'EXN-2',
 'ccRGCs',
 'Encs-1',
 'RtCs',
 'CBC/BRCs-1',
 'CBC/BRCs-2',
 'RGCs-1',
 'EnCs-2',
 'inPCs',
 'ER-Cs',
 'EXN-1']

Controls only¶

Region by leiden¶

In [11]:
## Not normalized
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] =  adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
adata = adata[adata.obs["type"].isin(["control"])]




variable1="region"
variable2="leidenAnno"





compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
fig = px.bar(compositions, x=variable2, y="number_of_cells", color=variable1, 
             category_orders={variable2:leidenOrder},
              height=800,width=1000, template="plotly_white",
             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Not normalized",
)



fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)), 
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )
fig.update_xaxes(tickangle=-45)

fig.write_image(figDir+"/"+nb_fname+"Not_norm.ctlOnly"+".pdf")

fig.show()







## Normalized by region
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])





fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregion", color=variable1, 
              height=800,width=1000, template="plotly_white",
                          category_orders={variable2:leidenOrder},

             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region",
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)


fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)), 
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )
fig.update_xaxes(tickangle=-45)

fig.write_image(figDir+"/"+nb_fname+"Region_Norm.ctlOnly"+".pdf")

fig.show()





## Normalized by region and leiden
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
compositions["number_of_cells_normByregionANDLeiden"] = np.array(compositions["number_of_cells_normByregion"]) / np.array(compositions.groupby(variable2)["number_of_cells_normByregion"].sum()[compositions[variable2]])




fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregionANDLeiden", color=variable1, 
              height=800,width=1000, template="plotly_white",
                          category_orders={variable2:leidenOrder},

             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region and total of each leiden",
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)

fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )

fig.update_xaxes(tickangle=-45)



fig.write_image(figDir+"/"+nb_fname+"Leiden_Region_Norm.ctlOnly"+".pdf")

fig.show()

Polaroids only¶

Region by leiden¶

In [12]:
## Not normalized
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] =  adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
adata = adata[adata.obs["type"].isin(["polaroid"])]


variable1="region"
variable2="leidenAnno"





compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
fig = px.bar(compositions, x=variable2, y="number_of_cells", color=variable1, 
             category_orders={variable2:leidenOrder},
              height=800,width=1000, template="plotly_white",
             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Not normalized",
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)), 
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )
fig.write_image(figDir+"/"+nb_fname+"Not_norm.polaroidOnly"+".pdf")


fig.show()







## Normalized by region
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])




fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregion", color=variable1, 
              height=800,width=1000, template="plotly_white",
                          category_orders={variable2:leidenOrder},

             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region",
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)), 
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )
fig.write_image(figDir+"/"+nb_fname+"Region_Norm.polaroidOnly"+".pdf")


fig.show()





## Normalized by region and leiden
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
compositions["number_of_cells_normByregionANDLeiden"] = np.array(compositions["number_of_cells_normByregion"]) / np.array(compositions.groupby(variable2)["number_of_cells_normByregion"].sum()[compositions[variable2]])



fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregionANDLeiden", color=variable1, 
              height=800,width=1000, template="plotly_white",
                          category_orders={variable2:leidenOrder},

             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region and total of each leiden",
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)), 
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )
fig.write_image(figDir+"/"+nb_fname+"Leiden_Region_Norm.polaroidOnly"+".pdf")


fig.show()

Polaroids and control¶

Region by leiden¶

In [13]:
## Not normalized
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] =  adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
#adata = adata[adata.obs["type"].isin(["polaroid"])]


variable1="region"
variable2="leidenAnno"

regionOrder=["proximal","medial","distal","piece1","piece2","piece3"]




compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
fig = px.bar(compositions, x=variable2, y="number_of_cells", color=variable1, 
             category_orders={variable2:leidenOrder,variable1:regionOrder},
              height=800,width=1000, template="plotly_white",
             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Not normalized",
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)), 
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )
fig.update_xaxes(tickangle=-45)



#fig.write_image(figDir+"/"+outBaseName+"Not_norm.polaroidOnly"+".pdf")


fig.show()

## Normalized by region
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])




fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregion", color=variable1, 
              height=800,width=1000, template="plotly_white",
             category_orders={variable2:leidenOrder,variable1:regionOrder},

             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region",
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)), 
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                        )

fig.update_xaxes(tickangle=-45)

#fig.write_image(figDir+"/"+outBaseName+"Region_Norm.polaroidOnly"+".pdf")


fig.show()





## Normalized by region and leiden
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
compositions["number_of_cells_normByregionANDLeiden"] = np.array(compositions["number_of_cells_normByregion"]) / np.array(compositions.groupby(variable2)["number_of_cells_normByregion"].sum()[compositions[variable2]])



fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregionANDLeiden", color=variable1, 
              height=800,width=1000, template="plotly_white",
             category_orders={variable2:leidenOrder,variable1:regionOrder},

             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region and total of each leiden",
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)), 
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )
fig.update_xaxes(tickangle=-45)

fig.write_image(figDir+"/"+nb_fname+"Leiden_Region_Norm.AllByRegion."+".pdf")


fig.show()

polaroids vs controls¶

Region by leiden¶

In [14]:
## Not normalized
variable1="type"
variable2="leidenAnno"

adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] =  adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()

compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
compositions["number_of_cells_normByregionANDleidenAnno"] = np.array(compositions["number_of_cells_normByregion"]) / np.array(compositions.groupby(variable2)["number_of_cells_normByregion"].sum()[compositions[variable2]])

#leidenOrder = compositions[compositions[variable1] == "polaroid"].sort_values("number_of_cells_normByregionANDleidenAnno", ascending=False)[variable2].tolist()
leidenOrder













compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
fig = px.bar(compositions, x=variable2, y="number_of_cells", color=variable1, 
             category_orders={variable2:leidenOrder},
              height=800,width=1000, template="plotly_white",
             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Not normalized",
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)), 
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )
fig.update_xaxes(tickangle=-45)


fig.write_image(figDir+"/"+nb_fname+"not_Norm.both"+".pdf")

fig.show()







## Normalized by region
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])




fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregion", color=variable1, 
              height=800,width=1000, template="plotly_white",
                          category_orders={variable2:leidenOrder},

             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region",
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)), 
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )
fig.update_xaxes(tickangle=-45)


fig.write_image(figDir+"/"+nb_fname+"Region_Norm.both"+".pdf")

fig.show()





## Normalized by region and leiden
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
compositions["number_of_cells_normByregionANDLeiden"] = np.array(compositions["number_of_cells_normByregion"]) / np.array(compositions.groupby(variable2)["number_of_cells_normByregion"].sum()[compositions[variable2]])



fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregionANDLeiden", color=variable1, 
              height=800,width=1000, template="plotly_white",
                          category_orders={variable2:leidenOrder},

             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region and total of each leiden",
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)

fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)), 
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )

fig.update_xaxes(tickangle=-45)


                                
fig.write_image(figDir+"/"+nb_fname+"Leiden_Region_Norm.both"+".pdf", scale=3)


fig.show()

UMAPs by organoid¶

In [15]:
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] =  adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
In [16]:
for org in adata.obs.organoid.unique() :
    print("Plot for organoid {}".format(org))
    print("Plot for organoid {}".format(org))

    sc.pl.umap(adata, color=["organoid"],groups= org, size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)
    adataSS = adata[adata.obs.organoid==org]
    sc.pl.umap(adataSS, color=["leidenAnno"], size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)
    sc.pl.umap(adataSS, color=["organoid"], size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)
Plot for organoid control3
Plot for organoid control3
Plot for organoid control1
Plot for organoid control1
Plot for organoid control2
Plot for organoid control2
Plot for organoid polaroid3
Plot for organoid polaroid3
Plot for organoid polaroid2
Plot for organoid polaroid2
Plot for organoid polaroid1
Plot for organoid polaroid1

SP8/EMX1 positive cells¶

In [17]:
markersDblPositive = ("SP8","EMX1")
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] =  adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
adata.obs["{}-{}+".format(markersDblPositive[0],markersDblPositive[1])] = np.nan
adata.obs.loc[((adata.raw.to_adata()[:,markersDblPositive[0]].X.todense().A1 > 0 ) & (adata.raw.to_adata()[:,markersDblPositive[1]].X.todense().A1 > 0 )),"{}-{}+".format(markersDblPositive[0],markersDblPositive[1])] = "{}-{}+".format(markersDblPositive[0],markersDblPositive[1])
In [18]:
sc.pl.umap(adata, color=["{}-{}+".format(markersDblPositive[0],markersDblPositive[1])], ncols = 2, size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)
... storing 'SP8-EMX1+' as categorical
In [19]:
with open("./colorMap.yaml", 'r') as f:
    colorMap = yaml.load(f, Loader=yaml.FullLoader)["uns_colors"]
    
catOrder = ["piece1","piece2","piece3","distal","medial","proximal"]
In [20]:
import holoviews as hv
obsTupleToMap = ("region","{}-{}+".format(markersDblPositive[0],markersDblPositive[1]))
SankeyDF=adata.obs[list(obsTupleToMap)]
SankeyDF.columns = [obsTupleToMap[0],obsTupleToMap[1]]
SankeyDF = SankeyDF.groupby([obsTupleToMap[0],obsTupleToMap[1]]).size().reset_index().rename(columns={0:'count'})
SankeyDF=SankeyDF[SankeyDF["count"] != 0]
hv.extension('bokeh')

print(adata.obs[obsTupleToMap[0]].value_counts())
print(pd.crosstab(adata.obs[obsTupleToMap[0]],adata.obs[obsTupleToMap[1]]))

SankeyDF.region = SankeyDF.region.cat.set_categories(catOrder)
SankeyDF = SankeyDF.sort_values(["region"])

sankey1 = hv.Sankey(SankeyDF, kdims=[obsTupleToMap[0],obsTupleToMap[1]], vdims="count")

colorDict =dict(zip(adata.obs[obsTupleToMap[0]].unique().tolist(),[colorMap[i]["color"] for i in adata.obs[obsTupleToMap[0]].unique().tolist()]))

sankey1.opts(cmap=colorDict,label_position='outer',
edge_color=obsTupleToMap[0], edge_line_width=0,
node_alpha=1.0, node_width=40, node_sort=False,
width=1100, height=700, bgcolor="white")
proximal    6093
medial      3019
piece3      2918
piece2      2892
piece1      2384
distal      1516
Name: region, dtype: int64
SP8-EMX1+  SP8-EMX1+
region              
distal            43
medial            13
piece1            33
piece2            19
piece3            12
proximal          24
Out[20]:
In [21]:
markersDblPositive[1]
Out[21]:
'EMX1'
In [22]:
variable1="region"
variable2="{}-{}+".format(markersDblPositive[0],markersDblPositive[1])
#variable2="SP8-EMX1+"




compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normBy"+variable1] = (np.array(compositions["number_of_cells"]) / adata.obs[variable1].value_counts().loc[compositions[variable1]]).tolist()


print(compositions)

fig = px.bar(compositions, x=variable2, y="number_of_cells_normBy"+variable1, color=variable1, 
              height=800,width=1000, template="plotly_white",
                          category_orders={variable1:catOrder},

             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"]))
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)

fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )




fig.write_image(figDir+"/"+nb_fname+"Leiden_Region_Norm.{}-{}+".format(markersDblPositive[0],markersDblPositive[1])+".pdf")

fig.show()
     region  SP8-EMX1+  number_of_cells  number_of_cells_normByregion
0    distal  SP8-EMX1+               43                      0.028364
1    medial  SP8-EMX1+               13                      0.004306
2    piece1  SP8-EMX1+               33                      0.013842
3    piece2  SP8-EMX1+               19                      0.006570
4    piece3  SP8-EMX1+               12                      0.004112
5  proximal  SP8-EMX1+               24                      0.003939

SATB2 /BCL11B positive cells¶

In [23]:
markersDblPositive = ("SATB2","BCL11B")
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] =  adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
adata.obs["{}-{}+".format(markersDblPositive[0],markersDblPositive[1])] = np.nan
adata.obs.loc[((adata.raw.to_adata()[:,markersDblPositive[0]].X.todense().A1 > 0 ) & (adata.raw.to_adata()[:,markersDblPositive[1]].X.todense().A1 > 0 )),"{}-{}+".format(markersDblPositive[0],markersDblPositive[1])] = "{}-{}+".format(markersDblPositive[0],markersDblPositive[1])
In [24]:
sc.pl.umap(adata, color=["{}-{}+".format(markersDblPositive[0],markersDblPositive[1])], ncols = 2, size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)
... storing 'SATB2-BCL11B+' as categorical
In [25]:
with open("./colorMap.yaml", 'r') as f:
    colorMap = yaml.load(f, Loader=yaml.FullLoader)["uns_colors"]
    
catOrder = ["piece1","piece2","piece3","distal","medial","proximal"]
In [26]:
import holoviews as hv
obsTupleToMap = ("region","{}-{}+".format(markersDblPositive[0],markersDblPositive[1]))
SankeyDF=adata.obs[list(obsTupleToMap)]
SankeyDF.columns = [obsTupleToMap[0],obsTupleToMap[1]]
SankeyDF = SankeyDF.groupby([obsTupleToMap[0],obsTupleToMap[1]]).size().reset_index().rename(columns={0:'count'})
SankeyDF=SankeyDF[SankeyDF["count"] != 0]
hv.extension('bokeh')

print(adata.obs[obsTupleToMap[0]].value_counts())
print(pd.crosstab(adata.obs[obsTupleToMap[0]],adata.obs[obsTupleToMap[1]]))

SankeyDF.region = SankeyDF.region.cat.set_categories(catOrder)
SankeyDF = SankeyDF.sort_values(["region"])

sankey1 = hv.Sankey(SankeyDF, kdims=[obsTupleToMap[0],obsTupleToMap[1]], vdims="count")

colorDict =dict(zip(adata.obs[obsTupleToMap[0]].unique().tolist(),[colorMap[i]["color"] for i in adata.obs[obsTupleToMap[0]].unique().tolist()]))

sankey1.opts(cmap=colorDict,label_position='outer',
edge_color=obsTupleToMap[0], edge_line_width=0,
node_alpha=1.0, node_width=40, node_sort=False,
width=1100, height=700, bgcolor="white")
proximal    6093
medial      3019
piece3      2918
piece2      2892
piece1      2384
distal      1516
Name: region, dtype: int64
SATB2-BCL11B+  SATB2-BCL11B+
region                      
distal                    17
medial                    25
piece1                    22
piece2                    32
piece3                    32
proximal                  40
Out[26]:
In [27]:
variable1="region"
variable2="{}-{}+".format(markersDblPositive[0],markersDblPositive[1])
#variable2="SP8-EMX1+"




compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normBy"+variable1] = (np.array(compositions["number_of_cells"]) / adata.obs[variable1].value_counts().loc[compositions[variable1]]).tolist()


print(compositions)

fig = px.bar(compositions, x=variable2, y="number_of_cells_normBy"+variable1, color=variable1, 
              height=800,width=1000, template="plotly_white",
                          category_orders={variable1:catOrder},

             color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"]))
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)

fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
                  xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
                              )




fig.write_image(figDir+"/"+nb_fname+"Leiden_Region_Norm.{}-{}+".format(markersDblPositive[0],markersDblPositive[1])+".pdf")

fig.show()
     region  SATB2-BCL11B+  number_of_cells  number_of_cells_normByregion
0    distal  SATB2-BCL11B+               17                      0.011214
1    medial  SATB2-BCL11B+               25                      0.008281
2    piece1  SATB2-BCL11B+               22                      0.009228
3    piece2  SATB2-BCL11B+               32                      0.011065
4    piece3  SATB2-BCL11B+               32                      0.010966
5  proximal  SATB2-BCL11B+               40                      0.006565